Set working directory

Load libraries

load .RData

## Observations: 712
## Variables: 72
## $ CowID                  <fct> 276000941090648, 276000951311013, 2760009…
## $ Cow.Birth.Date         <date> 2007-10-19, 2016-05-31, 2016-05-31, 2012…
## $ Farm.Name              <fct> Stadler, Stadler, Stadler, Stadler, Stadl…
## $ Farm.No                <fct> 14184137142, 14184137142, 14184137142, 14…
## $ Cow.Breed              <fct> 01, 01, 01, 01, 01, 01, 01, 01, 01, 01, 0…
## $ Cow.Exit.Date          <date> NA, NA, NA, NA, NA, NA, NA, NA, 2018-09-…
## $ BS.DATE                <date> 2018-11-06, 2018-07-24, 2018-08-07, 2018…
## $ BS.Month               <fct> 11-2018, 07-2018, 08-2018, 12-2018, 12-20…
## $ BS.MS.Date.Difference  <dbl> 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,…
## $ BS.DIM                 <dbl> 21, 15, 29, 16, 16, 28, 11, 22, 6, 13, 13…
## $ Hapto.Result           <dbl> 597.5, 280.0, 741.5, 280.0, 1903.5, 11584…
## $ Hapto.Log              <dbl> 6.392754, 5.634790, 6.608675, 5.634790, 7…
## $ Hapto.0.1              <fct> 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0,…
## $ Hapto0.35              <fct> 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,…
## $ Hapto.HML              <fct> 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 2, 0,…
## $ Hapto.HL               <fct> 1, 1, 1, 1, 1, NA, 1, 1, NA, NA, 1, 1, 3,…
## $ BS.NEFA                <dbl> 0.23, 0.68, 0.48, 0.32, 0.33, 0.22, 0.50,…
## $ BS.BHBA                <dbl> 1.35, 1.49, 1.78, 1.46, 1.26, 0.98, 0.71,…
## $ MS.Date.Taken          <date> 2018-11-05, 2018-07-23, 2018-08-07, 2018…
## $ MS.Time.Taken          <chr> "06:42:00", "12:00:00", "12:00:00", "01:0…
## $ Calving.Date           <date> 2018-10-16, 2018-07-09, 2018-07-09, 2018…
## $ Calving.No             <dbl> 9, 1, 1, 5, 7, 9, 4, 6, 2, 5, 1, 1, 1, 6,…
## $ Hfr.or.Cow             <fct> cow, hfr, hfr, cow, cow, cow, cow, cow, c…
## $ MS.DIM                 <dbl> 20, 14, 29, 15, 15, 27, 11, 21, 5, 12, 12…
## $ MS.Date.Analyzed       <date> 2018-11-07, 2018-07-26, 2018-08-08, 2018…
## $ Milk.Yield             <dbl> 39.9, 23.3, NA, 41.6, 30.4, 41.6, NA, 40.…
## $ MS.Milk.Yield          <dbl> 21.2, 23.3, 14.9, 20.7, 11.9, 17.0, 12.5,…
## $ MS.Fat                 <dbl> 4.32, 4.70, 4.21, 6.12, 5.04, 5.61, 3.82,…
## $ MS.Protein             <dbl> 2.79, 3.05, 2.85, 3.42, 2.89, 2.76, 3.30,…
## $ MS.S.Cell.Count        <dbl> 16, 298, 231, 21, 10, 21, 19, 10, 249, 66…
## $ MS.Lactose             <dbl> 4.73, 4.86, 4.97, 4.82, 4.81, 4.80, 4.83,…
## $ MS.Urea                <dbl> 24.4, 52.2, 26.0, 28.1, 20.0, 30.0, 16.5,…
## $ MS.pH                  <dbl> 6.64, 6.65, 6.68, 6.66, 6.66, 6.64, 6.70,…
## $ MS.Acetone             <dbl> 10, 140, 140, 170, 120, 60, 90, NA, 270, …
## $ MS.BHBA                <dbl> 20, 90, 90, 170, 90, 70, 50, 10, 170, 40,…
## $ MS.NEFA                <dbl> 6.30, 9.90, 8.00, 4.90, 4.00, 5.20, 6.60,…
## $ MS.NSFA                <dbl> 14.2, 19.6, 15.9, 22.9, 19.9, 20.1, 15.7,…
## $ MS.MFA                 <dbl> 1.142, 1.653, 1.319, 1.878, 1.657, 1.641,…
## $ MS.PFA                 <dbl> 0.144, 0.194, 0.137, 0.207, 0.182, 0.193,…
## $ MS.SFA                 <dbl> 2.653, 2.569, 2.435, 3.726, 2.868, 3.375,…
## $ MS.Palmeic             <dbl> 1.172, 1.094, 1.115, 1.422, 1.235, 1.410,…
## $ MS.Stearine            <dbl> 0.471, 0.653, 0.534, 0.735, 0.637, 0.636,…
## $ MS.Oleic               <dbl> 1.061, 1.596, 1.199, 1.802, 1.534, 1.582,…
## $ CE.Date                <date> 2018-11-06, 2018-07-24, 2018-08-07, 2018…
## $ CE.Temp                <dbl> 38.2, 38.7, 38.4, 38.2, 38.5, 38.0, 38.1,…
## $ CE.Environ.Temp        <dbl> 8.0, 19.3, 22.4, 3.9, 11.9, 6.0, 22.4, 3.…
## $ CE.Fat.Level           <dbl> 12, 19, 12, 16, 13, 12, 19, 20, 17, 35, 2…
## $ CE.Skin.Dehydration    <fct> phy, phy, phy, phy, phy, phy, phy, phy, p…
## $ CE.Stom.Tension        <fct> phy, phy, phy, phy, phy, phy, phy, phy, p…
## $ CE.Stom.Layering       <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ CE.Stom.Fluid.Movement <fct> obb, obb, obb, obb, obb, obb, obb, obb, o…
## $ CE.Stom.Ping           <fct> obb, obb, obb, obb, obb, obb, obb, obb, o…
## $ CE.Stom.Fullness       <fct> norm, abnorm, abnorm, abnorm, abnorm, nor…
## $ CE.Stom.Noise.Freq     <fct> NA, NA, NA, NA, NA, NA, NA, NA, bis_2, NA…
## $ CE.Waste.Consistency   <fct> norm, norm, norm, norm, norm, norm, norm,…
## $ CE.Waste.Digestion     <fct> maess, gut, gut, maess, gut, maess, gut, …
## $ CE.Locomotion.Score    <fct> 3, 1, 1, 2, 1, 3, 2, 2, 3, 2, 1, 1, 2, 1,…
## $ CE.Lame                <fct> 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ BS.NEFA.Log            <dbl> -1.4696760, -0.3856625, -0.7339692, -1.13…
## $ BS.BHBA.Log            <dbl> 0.30010459, 0.39877612, 0.57661336, 0.378…
## $ MS.S.Cell.Count.Log    <dbl> 2.772589, 5.697093, 5.442418, 3.044522, 2…
## $ MS.BHBA.Log            <dbl> 2.995732, 4.499810, 4.499810, 5.135798, 4…
## $ MS.Acetone.Log         <dbl> 2.302585, 4.941642, 4.941642, 5.135798, 4…
## $ BS.NEFA.0.7            <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,…
## $ BS.BHBA.1.2            <fct> 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0,…
## $ Hapto0.35.2            <fct> 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,…
## $ BS.Month2              <fct> 11-2018, 07-2018, 08-2018, 12-2018, 12-20…
## $ BS.Month_warm          <fct> 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,…
## $ CE.Stom.Fullness01     <fct> 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,…
## $ CE.Stom.Tension01      <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ clusterHapto5          <int> 1, 2, 2, 4, 2, 1, 3, 4, 1, 3, 3, 3, 2, 4,…
## $ clusterHapto           <int> 2, 2, 2, 3, 2, 1, 2, 3, 1, 2, 2, 2, 2, 3,…

Explore Month of sampling and warm (months 7, 8, 9) vs cooler months (months: 10, 11, 12, and 6) for effect on Hapto.Log

## 
## Call:
## lm(formula = Hapto.Log ~ BS.Month, data = hp9)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.153 -1.604 -1.002  1.010  7.468 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       6.3227     0.3127  20.219  < 2e-16 ***
## BS.Month07-2018   1.3364     0.4050   3.300  0.00102 ** 
## BS.Month08-2018   1.8506     0.3805   4.863 1.43e-06 ***
## BS.Month09-2018   1.6428     0.4106   4.001 6.97e-05 ***
## BS.Month10-2018   0.8946     0.3778   2.368  0.01816 *  
## BS.Month11-2018   0.6158     0.3787   1.626  0.10439    
## BS.Month12-2018   0.4510     0.4372   1.032  0.30264    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.482 on 705 degrees of freedom
## Multiple R-squared:  0.0532, Adjusted R-squared:  0.04515 
## F-statistic: 6.603 on 6 and 705 DF,  p-value: 8.371e-07

## 
## Call:
## lm(formula = Hapto.Log ~ BS.Month_warm, data = hp9)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0198 -1.5523 -1.1927  0.8327  7.4966 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.9099     0.1242  55.624  < 2e-16 ***
## BS.Month_warm1   1.0515     0.1880   5.594 3.16e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.488 on 710 degrees of freedom
## Multiple R-squared:  0.04222,    Adjusted R-squared:  0.04087 
## F-statistic:  31.3 on 1 and 710 DF,  p-value: 3.162e-08

Start: baseline model and full model for linear regression

## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: Hapto.Log
##                         Chisq Df Pr(>Chisq)    
## (Intercept)            1.7316  1  0.1882102    
## MS.Milk.Yield          1.9544  1  0.1621112    
## MS.NEFA                0.4721  1  0.4920337    
## MS.DIM                 0.5416  1  0.4617607    
## MS.Fat                 1.4057  1  0.2357791    
## MS.Protein            10.5874  1  0.0011386 ** 
## MS.Lactose             2.9903  1  0.0837655 .  
## MS.Acetone.Log         0.3383  1  0.5608100    
## MS.Urea                2.8482  1  0.0914773 .  
## MS.S.Cell.Count.Log   16.0056  1  6.316e-05 ***
## MS.pH                  0.0006  1  0.9798728    
## CE.Environ.Temp        0.0992  1  0.7527752    
## Hfr.or.Cow             3.1194  1  0.0773656 .  
## BS.Month_warm          2.2656  1  0.1322752    
## CE.Skin.Dehydration    0.3119  1  0.5765167    
## CE.Stom.Tension01      1.3232  1  0.2500136    
## CE.Stom.Layering       0.1856  1  0.6665866    
## CE.Stom.Fullness01     0.3702  1  0.5429081    
## CE.Stom.Noise.Freq     2.0099  2  0.3660696    
## CE.Waste.Consistency   1.4452  3  0.6949762    
## CE.Lame               11.6515  1  0.0006415 ***
## BS.NEFA.0.7            0.4927  1  0.4827087    
## BS.BHBA                2.3703  1  0.1236604    
## Cow.Breed              0.5619  1  0.4535058    
## BS.NEFA.0.7:BS.BHBA    0.0239  1  0.8770401    
## MS.Milk.Yield:BS.BHBA  0.3536  1  0.5520542    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Note: there are still those linear shapes in the residual plots indicating that we are missing information explaning the hapto outcomes

Final model from Backward step elimination

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## Hapto.Log ~ MS.Milk.Yield + MS.Protein + MS.Lactose + MS.S.Cell.Count.Log +  
##     Hfr.or.Cow + BS.Month_warm + CE.Stom.Tension01 + CE.Stom.Fullness01 +  
##     CE.Lame + BS.NEFA.0.7 + (1 | CowID:Farm.No)
##    Data: hp9
## 
## REML criterion at convergence: 3133.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7468 -0.5993 -0.2304  0.4390  3.2446 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  CowID:Farm.No (Intercept) 0.6406   0.8004  
##  Residual                  4.1093   2.0271  
## Number of obs: 712, groups:  CowID:Farm.No, 423
## 
## Fixed effects:
##                      Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)          12.16543    2.90897 579.30000   4.182 3.34e-05 ***
## MS.Milk.Yield        -0.03940    0.01437 655.30000  -2.741  0.00629 ** 
## MS.Protein           -0.62638    0.28879 638.50000  -2.169  0.03045 *  
## MS.Lactose           -1.01367    0.49800 576.20000  -2.035  0.04226 *  
## MS.S.Cell.Count.Log   0.48289    0.06990 561.00000   6.908 1.33e-11 ***
## Hfr.or.Cowhfr         0.81732    0.20284 396.10000   4.029 6.71e-05 ***
## BS.Month_warm1        0.93144    0.17766 469.40000   5.243 2.40e-07 ***
## CE.Stom.Tension011    0.99395    0.32674 696.70000   3.042  0.00244 ** 
## CE.Stom.Fullness011  -0.58044    0.17424 699.90000  -3.331  0.00091 ***
## CE.Lame1              1.23968    0.26958 646.90000   4.599 5.12e-06 ***
## BS.NEFA.0.71          1.02456    0.22097 663.70000   4.637 4.27e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) MS.M.Y MS.Prt MS.Lct MS.S.C Hfr..C BS.M_1 CE.S.T CE.S.F
## MS.Milk.Yld -0.127                                                        
## MS.Protein  -0.547  0.134                                                 
## MS.Lactose  -0.935 -0.001  0.246                                          
## MS.S.Cl.C.L -0.392  0.037  0.003  0.353                                   
## Hfr.r.Cwhfr  0.032  0.218  0.145 -0.125 -0.081                            
## BS.Mnth_wr1  0.033 -0.081  0.092 -0.088 -0.084  0.000                     
## CE.Stm.T011  0.013  0.015 -0.049 -0.007 -0.016 -0.001  0.102              
## CE.Stm.F011  0.061  0.065 -0.042 -0.106  0.012 -0.125 -0.028 -0.009       
## CE.Lame1    -0.083  0.037  0.120  0.036 -0.007  0.088 -0.067 -0.137  0.088
## BS.NEFA.0.7 -0.142  0.088  0.157  0.080  0.011 -0.047 -0.070  0.042  0.193
##             CE.Lm1
## MS.Milk.Yld       
## MS.Protein        
## MS.Lactose        
## MS.S.Cl.C.L       
## Hfr.r.Cwhfr       
## BS.Mnth_wr1       
## CE.Stm.T011       
## CE.Stm.F011       
## CE.Lame1          
## BS.NEFA.0.7 -0.066

Repeat multiple variable analysis as logistic regression with Hapto0.35 as outcome variable

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## Hapto0.35 ~ MS.Milk.Yield + MS.Protein + MS.Lactose + MS.S.Cell.Count.Log +  
##     Hfr.or.Cow + BS.Month_warm + CE.Stom.Tension01 + CE.Stom.Fullness01 +  
##     CE.Lame + BS.NEFA.0.7 + BS.BHBA + BS.NEFA.0.7 * BS.BHBA +  
##     MS.Milk.Yield * BS.BHBA + Cow.Breed + (1 | CowID:Farm.No)
##    Data: hp9
## 
##      AIC      BIC   logLik deviance df.resid 
##    686.9    760.0   -327.5    654.9      696 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1861 -0.5330 -0.3407  0.3916  6.8042 
## 
## Random effects:
##  Groups        Name        Variance  Std.Dev. 
##  CowID:Farm.No (Intercept) 5.962e-15 7.721e-08
## Number of obs: 712, groups:  CowID:Farm.No, 423
## 
## Fixed effects:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            6.08996    3.45404   1.763 0.077877 .  
## MS.Milk.Yield         -0.05925    0.03007  -1.971 0.048770 *  
## MS.Protein            -0.77501    0.35488  -2.184 0.028972 *  
## MS.Lactose            -1.27327    0.57467  -2.216 0.026715 *  
## MS.S.Cell.Count.Log    0.42384    0.07881   5.378 7.54e-08 ***
## Hfr.or.Cowhfr          0.80479    0.22288   3.611 0.000305 ***
## BS.Month_warm1         0.83619    0.20152   4.149 3.33e-05 ***
## CE.Stom.Tension011     0.52014    0.37065   1.403 0.160516    
## CE.Stom.Fullness011   -0.49354    0.20765  -2.377 0.017467 *  
## CE.Lame1               1.25210    0.28322   4.421 9.83e-06 ***
## BS.NEFA.0.71           0.77156    0.42691   1.807 0.070715 .  
## BS.BHBA               -0.81052    0.49988  -1.621 0.104921    
## Cow.Breed02            0.03740    0.22505   0.166 0.868011    
## BS.NEFA.0.71:BS.BHBA   0.15969    0.36375   0.439 0.660650    
## MS.Milk.Yield:BS.BHBA  0.02970    0.02362   1.258 0.208537    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE)  or
##   vcov(x)     if you need it

## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: Hapto0.35
##                         Chisq Df Pr(>Chisq)    
## (Intercept)            3.1087  1  0.0778769 .  
## MS.Milk.Yield          3.8832  1  0.0487702 *  
## MS.Protein             4.7692  1  0.0289725 *  
## MS.Lactose             4.9091  1  0.0267151 *  
## MS.S.Cell.Count.Log   28.9220  1  7.535e-08 ***
## Hfr.or.Cow            13.0386  1  0.0003051 ***
## BS.Month_warm         17.2175  1  3.333e-05 ***
## CE.Stom.Tension01      1.9694  1  0.1605159    
## CE.Stom.Fullness01     5.6488  1  0.0174669 *  
## CE.Lame               19.5448  1  9.827e-06 ***
## BS.NEFA.0.7            3.2664  1  0.0707146 .  
## BS.BHBA                2.6291  1  0.1049209    
## Cow.Breed              0.0276  1  0.8680106    
## BS.NEFA.0.7:BS.BHBA    0.1927  1  0.6606496    
## MS.Milk.Yield:BS.BHBA  1.5816  1  0.2085366    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## Hapto0.35 ~ MS.Milk.Yield + MS.Protein + MS.Lactose + MS.S.Cell.Count.Log +  
##     Hfr.or.Cow + BS.Month_warm + CE.Stom.Fullness01 + CE.Lame +  
##     BS.NEFA.0.7 + BS.BHBA.Log + (1 | CowID:Farm.No)
##    Data: hp9
## 
##      AIC      BIC   logLik deviance df.resid 
##    679.8    734.6   -327.9    655.8      700 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2034 -0.5258 -0.3342  0.3889  5.6275 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  CowID:Farm.No (Intercept) 0        0       
## Number of obs: 712, groups:  CowID:Farm.No, 423
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          5.75753    3.35422   1.717 0.086070 .  
## MS.Milk.Yield       -0.02844    0.01778  -1.600 0.109666    
## MS.Protein          -0.81294    0.35211  -2.309 0.020958 *  
## MS.Lactose          -1.35465    0.56911  -2.380 0.017299 *  
## MS.S.Cell.Count.Log  0.42946    0.07857   5.466 4.60e-08 ***
## Hfr.or.Cowhfr        0.79757    0.22130   3.604 0.000313 ***
## BS.Month_warm1       0.78677    0.19851   3.963 7.39e-05 ***
## CE.Stom.Fullness011 -0.48396    0.20598  -2.350 0.018797 *  
## CE.Lame1             1.28389    0.27720   4.632 3.63e-06 ***
## BS.NEFA.0.71         0.91805    0.23945   3.834 0.000126 ***
## BS.BHBA.Log         -0.50264    0.21676  -2.319 0.020401 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) MS.M.Y MS.Prt MS.Lct MS.S.C Hfr..C BS.M_1 CE.S.F CE.Lm1
## MS.Milk.Yld -0.134                                                        
## MS.Protein  -0.576  0.140                                                 
## MS.Lactose  -0.932 -0.002  0.277                                          
## MS.S.Cl.C.L -0.357  0.035 -0.048  0.325                                   
## Hfr.r.Cwhfr  0.034  0.222  0.130 -0.142 -0.001                            
## BS.Mnth_wr1  0.022 -0.084  0.083 -0.089 -0.013  0.044                     
## CE.Stm.F011  0.075  0.064 -0.023 -0.120 -0.045 -0.166 -0.051              
## CE.Lame1    -0.080  0.021  0.100  0.020  0.091  0.159  0.022  0.072       
## BS.NEFA.0.7 -0.063  0.091  0.077 -0.004  0.063 -0.021 -0.029  0.181 -0.032
## BS.BHBA.Log -0.240 -0.043  0.276  0.210 -0.014  0.021 -0.004 -0.014  0.053
##             BS.NEF
## MS.Milk.Yld       
## MS.Protein        
## MS.Lactose        
## MS.S.Cl.C.L       
## Hfr.r.Cwhfr       
## BS.Mnth_wr1       
## CE.Stm.F011       
## CE.Lame1          
## BS.NEFA.0.7       
## BS.BHBA.Log -0.278

Start k-means clustering analysis collect all ssign variables into hp10 and format factor variables as numeric dummy variables; check pair-wise correlations again

## Observations: 712
## Variables: 10
## $ MS.Milk.Yield       <dbl> 21.2, 23.3, 14.9, 20.7, 11.9, 17.0, 12.5, 11…
## $ MS.Protein          <dbl> 2.79, 3.05, 2.85, 3.42, 2.89, 2.76, 3.30, 3.…
## $ MS.Lactose          <dbl> 4.73, 4.86, 4.97, 4.82, 4.81, 4.80, 4.83, 4.…
## $ MS.S.Cell.Count.Log <dbl> 2.772589, 5.697093, 5.442418, 3.044522, 2.30…
## $ Hfr.or.Cow          <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,…
## $ BS.Month_warm       <dbl> 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0,…
## $ CE.Stom.Tension01   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ CE.Stom.Fullness01  <dbl> 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1,…
## $ CE.Lame             <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
## $ BS.NEFA.0.7         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,…

## Observations: 712
## Variables: 10
## $ MS.Milk.Yield       <dbl> 1.040073139, 1.387694150, -0.002789896, 0.95…
## $ MS.Protein          <dbl> -1.55816442, -0.73740583, -1.36875859, 0.430…
## $ MS.Lactose          <dbl> -0.50740817, 0.16796859, 0.73944124, -0.0398…
## $ MS.S.Cell.Count.Log <dbl> -1.08085884, 1.16364429, 0.96818536, -0.8721…
## $ Hfr.or.Cow          <dbl> 0.6397853, -1.5608290, -1.5608290, 0.6397853…
## $ BS.Month_warm       <dbl> -0.8800408, 1.1347150, 1.1347150, -0.8800408…
## $ CE.Stom.Tension01   <dbl> -0.2746318, -0.2746318, -0.2746318, -0.27463…
## $ CE.Stom.Fullness01  <dbl> -1.1444851, 0.8725282, 0.8725282, 0.8725282,…
## $ CE.Lame             <dbl> 2.7509398, -0.3630016, -0.3630016, -0.363001…
## $ BS.NEFA.0.7         <dbl> -0.4943799, -0.4943799, -0.4943799, -0.49437…

PCA and k-means clustering of hp11 check for missing values first, then perform PCA, plot and summarize Note: MS.Milk.Yield will be used instead of Milk.Yield (there are 325 missing values in Milk.Yield)

##                MS.Milk.Yield    MS.Protein    MS.Lactose
## N               7.120000e+02  7.120000e+02  7.120000e+02
## mean           -1.216243e-16 -4.765935e-16  5.121147e-16
## Std.Dev.        1.000000e+00  1.000000e+00  1.000000e+00
## min            -2.403030e+00 -2.726167e+00 -1.032635e+01
## Q1             -6.483718e-01 -7.058382e-01 -5.074082e-01
## median         -2.014305e-01 -7.448544e-02  6.406448e-02
## Q3              4.441514e-01  6.200026e-01  6.355371e-01
## max             5.194972e+00  8.511912e+00  2.298003e+00
## missing values  0.000000e+00  0.000000e+00  0.000000e+00
##                MS.S.Cell.Count.Log    Hfr.or.Cow BS.Month_warm
## N                     7.120000e+02  7.120000e+02  7.120000e+02
## mean                 -1.845848e-16  2.337293e-17 -3.695820e-17
## Std.Dev.              1.000000e+00  1.000000e+00  1.000000e+00
## min                  -1.441578e+00 -1.560829e+00 -8.800408e-01
## Q1                   -7.696719e-01 -1.560829e+00 -8.800408e-01
## median               -2.063642e-01  6.397853e-01 -8.800408e-01
## Q3                    6.238880e-01  6.397853e-01  1.134715e+00
## max                   3.500504e+00  6.397853e-01  1.134715e+00
## missing values        0.000000e+00  0.000000e+00  0.000000e+00
##                CE.Stom.Tension01 CE.Stom.Fullness01       CE.Lame
## N                   7.120000e+02       7.120000e+02  7.120000e+02
## mean               -1.972882e-17      -1.551579e-16  1.830139e-17
## Std.Dev.            1.000000e+00       1.000000e+00  1.000000e+00
## min                -2.746318e-01      -1.144485e+00 -3.630016e-01
## Q1                 -2.746318e-01      -1.144485e+00 -3.630016e-01
## median             -2.746318e-01       8.725282e-01 -3.630016e-01
## Q3                 -2.746318e-01       8.725282e-01 -3.630016e-01
## max                 3.636125e+00       8.725282e-01  2.750940e+00
## missing values      0.000000e+00       0.000000e+00  0.000000e+00
##                  BS.NEFA.0.7
## N               7.120000e+02
## mean           -5.049915e-17
## Std.Dev.        1.000000e+00
## min            -4.943799e-01
## Q1             -4.943799e-01
## median         -4.943799e-01
## Q3             -4.943799e-01
## max             2.019895e+00
## missing values  0.000000e+00

## Importance of components:
##                           Comp.1    Comp.2    Comp.3    Comp.4     Comp.5
## Standard deviation     1.2744174 1.2209574 1.1242494 1.0664119 0.99498377
## Proportion of Variance 0.1626424 0.1492834 0.1265714 0.1138834 0.09913851
## Cumulative Proportion  0.1626424 0.3119258 0.4384972 0.5523806 0.65151910
##                            Comp.6     Comp.7     Comp.8     Comp.9
## Standard deviation     0.92216926 0.87169483 0.84211875 0.78936246
## Proportion of Variance 0.08515922 0.07609206 0.07101614 0.06239695
## Cumulative Proportion  0.73667832 0.81277038 0.88378652 0.94618347
##                           Comp.10
## Standard deviation     0.73308216
## Proportion of Variance 0.05381653
## Cumulative Proportion  1.00000000

Note: need 3 PC’s to reach 50% variance…not a strong clustering, but acceptable Note 2: Hapto.Log was NOT used for the clustering

these graphs are not very useful, but they show the variance explained by the clustering per PC

## [1] 712  10

save first 2 PC’s as data frame

##        2 groups    3 groups  4 groups  5 groups    6 groups    7 groups
## SSE 1498.602345 1025.879128 833.59207 697.02354 576.1348054 475.3007574
## ssi    1.075525    1.918493   1.64959   2.11894   0.6588912   0.8442455
##        8 groups   9 groups  10 groups  11 groups   12 groups   13 groups
## SSE 418.3641339 371.524826 330.331521 299.565731 272.5845031 251.0064518
## ssi   0.8779883   1.020716   1.073037   1.051912   0.9987275   0.9987275
##      14 groups  15 groups  16 groups  17 groups  18 groups  19 groups
## SSE 229.712189 212.210008 197.540219 182.555307 172.835935 164.376120
## ssi   1.418272   1.397646   1.414821   1.376215   1.357759   1.357759
##      20 groups
## SSE 155.802044
## ssi   1.662432

use 5 clusters based on maximum in ssi graph, could use 3 only the Silhouette plot is not working; and number of observations per cluster

## 
##   1   2   3   4   5 
## 173  94 195  82 168

distribution of observations over 4 clusters is quite even

## 
##   1   2   3   4   5 
## 173  94 195  82 168

Note: cluster in the middle of the other clusters is linked to almost all other clusters; it is kind of a ‘fuzzy’ cluster

cbind cluster numbers into data set hp10, i.e. the non centered, non-scaled data set

cbind cluster numbers into data set hp9, the original data set, and save as RData set, as well

Plots and descriptives of variables per cluster

## 'data.frame':    712 obs. of  5 variables:
##  $ MS.Milk.Yield      : num  21.2 23.3 14.9 20.7 11.9 17 12.5 11.4 36.8 12.8 ...
##  $ MS.Protein         : num  2.79 3.05 2.85 3.42 2.89 2.76 3.3 3.7 2.86 3.37 ...
##  $ MS.Lactose         : num  4.73 4.86 4.97 4.82 4.81 4.8 4.83 4.81 4.82 4.85 ...
##  $ MS.S.Cell.Count.Log: num  2.77 5.7 5.44 3.04 2.3 ...
##  $ clusterHapto       : int  1 2 2 4 2 1 3 4 1 3 ...

cbind the Hapto.Log and Hapto.0.35 varaibles into the data set

## Observations: 712
## Variables: 13
## $ MS.Milk.Yield       <dbl> 21.2, 23.3, 14.9, 20.7, 11.9, 17.0, 12.5, 11…
## $ MS.Protein          <dbl> 2.79, 3.05, 2.85, 3.42, 2.89, 2.76, 3.30, 3.…
## $ MS.Lactose          <dbl> 4.73, 4.86, 4.97, 4.82, 4.81, 4.80, 4.83, 4.…
## $ MS.S.Cell.Count.Log <dbl> 2.772589, 5.697093, 5.442418, 3.044522, 2.30…
## $ Hfr.or.Cow          <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,…
## $ BS.Month_warm       <dbl> 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0,…
## $ CE.Stom.Tension01   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ CE.Stom.Fullness01  <dbl> 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1,…
## $ CE.Lame             <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
## $ BS.NEFA.0.7         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,…
## $ clusterHapto        <int> 1, 2, 2, 4, 2, 1, 3, 4, 1, 3, 3, 3, 2, 4, 4,…
## $ V2                  <dbl> 6.392754, 5.634790, 6.608675, 5.634790, 7.55…
## $ V3                  <fct> 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0,…
## 'data.frame':    712 obs. of  6 variables:
##  $ MS.Milk.Yield      : num  21.2 23.3 14.9 20.7 11.9 17 12.5 11.4 36.8 12.8 ...
##  $ MS.Protein         : num  2.79 3.05 2.85 3.42 2.89 2.76 3.3 3.7 2.86 3.37 ...
##  $ MS.Lactose         : num  4.73 4.86 4.97 4.82 4.81 4.8 4.83 4.81 4.82 4.85 ...
##  $ MS.S.Cell.Count.Log: num  2.77 5.7 5.44 3.04 2.3 ...
##  $ clusterHapto       : int  1 2 2 4 2 1 3 4 1 3 ...
##  $ Hapto.Log          : num  6.39 5.63 6.61 5.63 7.55 ...

Note: could add any other numeric variable of interest to the graph

descriptives by cluster for MS.Milk.Yield

## $`1`
##                     [,1]
## N              94.000000
## mean           16.680851
## Std.Dev.        7.509224
## min             2.100000
## Q1             11.600000
## median         15.450000
## Q3             20.225000
## max            46.300000
## missing values  0.000000
## 
## $`2`
##                      [,1]
## N              168.000000
## mean            14.112500
## Std.Dev.         4.841329
## min              0.600000
## Q1              11.000000
## median          13.300000
## Q3              15.700000
## max             40.600000
## missing values   0.000000
## 
## $`3`
##                      [,1]
## N              173.000000
## mean            16.193642
## Std.Dev.         6.613799
## min              4.200000
## Q1              11.700000
## median          14.900000
## Q3              18.900000
## max             40.900000
## missing values   0.000000
## 
## $`4`
##                     [,1]
## N              195.00000
## mean            12.99692
## Std.Dev.         4.59189
## min              0.40000
## Q1              10.10000
## median          12.20000
## Q3              15.10000
## max             32.70000
## missing values   0.00000
## 
## $`5`
##                     [,1]
## N              82.000000
## mean           16.414634
## Std.Dev.        6.656439
## min             1.400000
## Q1             12.500000
## median         15.300000
## Q3             18.700000
## max            36.200000
## missing values  0.000000

descriptives by cluster for MS.Protein

## $`1`
##                      [,1]
## N              94.0000000
## mean            3.0201064
## Std.Dev.        0.2232482
## min             2.4200000
## Q1              2.8700000
## median          3.0100000
## Q3              3.1600000
## max             3.6200000
## missing values  0.0000000
## 
## $`2`
##                       [,1]
## N              168.0000000
## mean             3.1285119
## Std.Dev.         0.2256292
## min              2.6200000
## Q1               2.9575000
## median           3.1400000
## Q3               3.2725000
## max              3.6800000
## missing values   0.0000000
## 
## $`3`
##                       [,1]
## N              173.0000000
## mean             3.2245665
## Std.Dev.         0.2253459
## min              2.7400000
## Q1               3.0600000
## median           3.2100000
## Q3               3.3700000
## max              3.9200000
## missing values   0.0000000
## 
## $`4`
##                       [,1]
## N              195.0000000
## mean             3.5203077
## Std.Dev.         0.2495251
## min              2.9300000
## Q1               3.3700000
## median           3.5300000
## Q3               3.6800000
## max              4.2100000
## missing values   0.0000000
## 
## $`5`
##                      [,1]
## N              82.0000000
## mean            3.4650000
## Std.Dev.        0.3944718
## min             2.8100000
## Q1              3.2750000
## median          3.4600000
## Q3              3.6175000
## max             5.9800000
## missing values  0.0000000

descriptives by cluster for MS.Lactose

## $`1`
##                      [,1]
## N              94.0000000
## mean            4.8218085
## Std.Dev.        0.1557705
## min             4.2400000
## Q1              4.7325000
## median          4.8300000
## Q3              4.9200000
## max             5.1500000
## missing values  0.0000000
## 
## $`2`
##                      [,1]
## N              168.000000
## mean             4.975893
## Std.Dev.         0.124676
## min              4.660000
## Q1               4.877500
## median           4.970000
## Q3               5.070000
## max              5.270000
## missing values   0.000000
## 
## $`3`
##                      [,1]
## N              173.000000
## mean             4.846243
## Std.Dev.         0.116276
## min              4.520000
## Q1               4.770000
## median           4.840000
## Q3               4.920000
## max              5.100000
## missing values   0.000000
## 
## $`4`
##                       [,1]
## N              195.0000000
## mean             4.7905641
## Std.Dev.         0.1500934
## min              4.2500000
## Q1               4.7050000
## median           4.8100000
## Q3               4.8900000
## max              5.1300000
## missing values   0.0000000
## 
## $`5`
##                      [,1]
## N              82.0000000
## mean            4.5797561
## Std.Dev.        0.2660269
## min             2.8400000
## Q1              4.5125000
## median          4.6500000
## Q3              4.7100000
## max             4.9600000
## missing values  0.0000000

descriptives by cluster for MS.S.Cell.Count

## $`1`
##                     [,1]
## N              94.000000
## mean            4.118352
## Std.Dev.        1.190854
## min             2.302585
## Q1              3.146134
## median          3.794921
## Q3              5.213506
## max             7.602401
## missing values  0.000000
## 
## $`2`
##                       [,1]
## N              168.0000000
## mean             3.5054926
## Std.Dev.         0.9411548
## min              2.3025851
## Q1               2.8180572
## median           3.3140207
## Q3               4.0253517
## max              6.9641356
## missing values   0.0000000
## 
## $`3`
##                      [,1]
## N              173.000000
## mean             4.063414
## Std.Dev.         1.059461
## min              2.302585
## Q1               3.258097
## median           3.970292
## Q3               4.634729
## max              7.540090
## missing values   0.000000
## 
## $`4`
##                      [,1]
## N              195.000000
## mean             4.246548
## Std.Dev.         1.279004
## min              2.302585
## Q1               3.258097
## median           4.007333
## Q3               5.071803
## max              8.119696
## missing values   0.000000
## 
## $`5`
##                     [,1]
## N              82.000000
## mean            5.728184
## Std.Dev.        1.316079
## min             2.890372
## Q1              4.884269
## median          5.798721
## Q3              6.354653
## max             8.741935
## missing values  0.000000

descriptives by cluster for Hapto.Log

## $`1`
##                     [,1]
## N              94.000000
## mean            9.208024
## Std.Dev.        3.274849
## min             5.010635
## Q1              5.972134
## median          8.558402
## Q3             12.671557
## max            14.756045
## missing values  0.000000
## 
## $`2`
##                      [,1]
## N              168.000000
## mean             6.728636
## Std.Dev.         1.788273
## min              4.976734
## Q1               5.634790
## median           5.938851
## Q3               7.415679
## max             13.774741
## missing values   0.000000
## 
## $`3`
##                      [,1]
## N              173.000000
## mean             7.202421
## Std.Dev.         2.321140
## min              4.962845
## Q1               5.634790
## median           6.061457
## Q3               8.055951
## max             14.406542
## missing values   0.000000
## 
## $`4`
##                      [,1]
## N              195.000000
## mean             6.739156
## Std.Dev.         2.137796
## min              4.948760
## Q1               5.634790
## median           5.634790
## Q3               6.830103
## max             13.778172
## missing values   0.000000
## 
## $`5`
##                     [,1]
## N              82.000000
## mean            8.423959
## Std.Dev.        2.902437
## min             4.941642
## Q1              5.758496
## median          7.392439
## Q3             10.751709
## max            14.510207
## missing values  0.000000

repeat graphs and descriptive stats for the factor variables

## 'data.frame':    712 obs. of  7 variables:
##  $ Hfr.or.Cow        : num  1 0 0 1 1 1 1 1 1 1 ...
##  $ BS.Month_warm     : num  0 1 1 0 0 0 1 0 1 1 ...
##  $ CE.Stom.Tension01 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CE.Stom.Fullness01: num  0 1 1 1 1 0 0 0 0 1 ...
##  $ CE.Lame           : num  1 0 0 0 0 1 0 0 1 0 ...
##  $ BS.NEFA.0.7       : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ clusterHapto      : int  1 2 2 4 2 1 3 4 1 3 ...

## , ,  = Hfr.or.Cow
## 
##    
##         1     2     3     4     5
##   0  3.90 10.52  2.70  4.02  0.81
##   1 12.77  6.15 13.97 12.65 15.85
## 
## , ,  = BS.Month_warm
## 
##    
##         1     2     3     4     5
##   0  3.90  6.75  9.25 14.44  9.35
##   1 12.77  9.92  7.42  2.22  7.32
## 
## , ,  = CE.Stom.Tension01
## 
##    
##         1     2     3     4     5
##   0 16.13 16.17 15.80 15.04 13.82
##   1  0.53  0.50  0.87  1.62  2.85
## 
## , ,  = CE.Stom.Fullness01
## 
##    
##         1     2     3     4     5
##   0 14.18  1.39 10.40  3.33 13.62
##   1  2.48 15.28  6.26 13.33  3.05
## 
## , ,  = CE.Lame
## 
##    
##         1     2     3     4     5
##   0  8.33 16.57 14.84 16.58 13.62
##   1  8.33  0.10  1.83  0.09  3.05
## 
## , ,  = BS.NEFA.0.7
## 
##    
##         1     2     3     4     5
##   0  5.50 15.08 12.81 16.67 12.40
##   1 11.17  1.59  3.85  0.00  4.27
## 'data.frame':    60 obs. of  4 variables:
##  $ levels      : Factor w/ 2 levels "0","1": 1 2 1 2 1 2 1 2 1 2 ...
##  $ clusterHapto: Factor w/ 5 levels "1","2","3","4",..: 1 1 2 2 3 3 4 4 5 5 ...
##  $ variable    : Factor w/ 6 levels "Hfr.or.Cow","BS.Month_warm",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ proportion  : num  3.9 12.77 10.52 6.15 2.7 ...

## , ,  = Hfr.or.Cow
## 
##    
##       1   2   3   4   5
##   0  22 106  28  47   4
##   1  72  62 145 148  78
## 
## , ,  = BS.Month_warm
## 
##    
##       1   2   3   4   5
##   0  22  68  96 169  46
##   1  72 100  77  26  36
## 
## , ,  = CE.Stom.Tension01
## 
##    
##       1   2   3   4   5
##   0  91 163 164 176  68
##   1   3   5   9  19  14
## 
## , ,  = CE.Stom.Fullness01
## 
##    
##       1   2   3   4   5
##   0  80  14 108  39  67
##   1  14 154  65 156  15
## 
## , ,  = CE.Lame
## 
##    
##       1   2   3   4   5
##   0  47 167 154 194  67
##   1  47   1  19   1  15
## 
## , ,  = BS.NEFA.0.7
## 
##    
##       1   2   3   4   5
##   0  31 152 133 195  61
##   1  63  16  40   0  21

Can add any other factor variable to this graph, need good notes to recall levels!!! 1 = event of interest

…………………………………………………… REPEAT clustering including Hapto.Log collect all ssign variables into hp10 and format factor variables as numeric dummy variables; check pair-wise correlations again

## Observations: 712
## Variables: 10
## $ MS.Milk.Yield       <dbl> 21.2, 23.3, 14.9, 20.7, 11.9, 17.0, 12.5, 11…
## $ MS.Protein          <dbl> 2.79, 3.05, 2.85, 3.42, 2.89, 2.76, 3.30, 3.…
## $ MS.Lactose          <dbl> 4.73, 4.86, 4.97, 4.82, 4.81, 4.80, 4.83, 4.…
## $ MS.S.Cell.Count.Log <dbl> 2.772589, 5.697093, 5.442418, 3.044522, 2.30…
## $ Hfr.or.Cow          <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,…
## $ BS.Month_warm       <dbl> 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0,…
## $ CE.Stom.Tension01   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ CE.Stom.Fullness01  <dbl> 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1,…
## $ CE.Lame             <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
## $ BS.NEFA.0.7         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,…

## Observations: 712
## Variables: 11
## $ MS.Milk.Yield       <dbl> 1.040073139, 1.387694150, -0.002789896, 0.95…
## $ MS.Protein          <dbl> -1.55816442, -0.73740583, -1.36875859, 0.430…
## $ MS.Lactose          <dbl> -0.50740817, 0.16796859, 0.73944124, -0.0398…
## $ MS.S.Cell.Count.Log <dbl> -1.08085884, 1.16364429, 0.96818536, -0.8721…
## $ Hfr.or.Cow          <dbl> 0.6397853, -1.5608290, -1.5608290, 0.6397853…
## $ BS.Month_warm       <dbl> -0.8800408, 1.1347150, 1.1347150, -0.8800408…
## $ CE.Stom.Tension01   <dbl> -0.2746318, -0.2746318, -0.2746318, -0.27463…
## $ CE.Stom.Fullness01  <dbl> -1.1444851, 0.8725282, 0.8725282, 0.8725282,…
## $ CE.Lame             <dbl> 2.7509398, -0.3630016, -0.3630016, -0.363001…
## $ BS.NEFA.0.7         <dbl> -0.4943799, -0.4943799, -0.4943799, -0.49437…
## $ Hapto.Log           <dbl> -0.38442550, -0.68282945, -0.29941937, -0.68…

PCA and k-means clustering of hp11 check for missing values first, then perform PCA, plot and summarize Note: MS.Milk.Yield will be used instead of Milk.Yield (there are 325 missing values in Milk.Yield)

##                MS.Milk.Yield    MS.Protein    MS.Lactose
## N               7.120000e+02  7.120000e+02  7.120000e+02
## mean           -1.216243e-16 -4.765935e-16  5.121147e-16
## Std.Dev.        1.000000e+00  1.000000e+00  1.000000e+00
## min            -2.403030e+00 -2.726167e+00 -1.032635e+01
## Q1             -6.483718e-01 -7.058382e-01 -5.074082e-01
## median         -2.014305e-01 -7.448544e-02  6.406448e-02
## Q3              4.441514e-01  6.200026e-01  6.355371e-01
## max             5.194972e+00  8.511912e+00  2.298003e+00
## missing values  0.000000e+00  0.000000e+00  0.000000e+00
##                MS.S.Cell.Count.Log    Hfr.or.Cow BS.Month_warm
## N                     7.120000e+02  7.120000e+02  7.120000e+02
## mean                 -1.845848e-16  2.337293e-17 -3.695820e-17
## Std.Dev.              1.000000e+00  1.000000e+00  1.000000e+00
## min                  -1.441578e+00 -1.560829e+00 -8.800408e-01
## Q1                   -7.696719e-01 -1.560829e+00 -8.800408e-01
## median               -2.063642e-01  6.397853e-01 -8.800408e-01
## Q3                    6.238880e-01  6.397853e-01  1.134715e+00
## max                   3.500504e+00  6.397853e-01  1.134715e+00
## missing values        0.000000e+00  0.000000e+00  0.000000e+00
##                CE.Stom.Tension01 CE.Stom.Fullness01       CE.Lame
## N                   7.120000e+02       7.120000e+02  7.120000e+02
## mean               -1.972882e-17      -1.551579e-16  1.830139e-17
## Std.Dev.            1.000000e+00       1.000000e+00  1.000000e+00
## min                -2.746318e-01      -1.144485e+00 -3.630016e-01
## Q1                 -2.746318e-01      -1.144485e+00 -3.630016e-01
## median             -2.746318e-01       8.725282e-01 -3.630016e-01
## Q3                 -2.746318e-01       8.725282e-01 -3.630016e-01
## max                 3.636125e+00       8.725282e-01  2.750940e+00
## missing values      0.000000e+00       0.000000e+00  0.000000e+00
##                  BS.NEFA.0.7     Hapto.Log
## N               7.120000e+02  7.120000e+02
## mean           -5.049915e-17  1.601124e-17
## Std.Dev.        1.000000e+00  1.000000e+00
## min            -4.943799e-01 -9.557153e-01
## Q1             -4.943799e-01 -6.828294e-01
## median         -4.943799e-01 -4.854276e-01
## Q3             -4.943799e-01  3.495475e-01
## max             2.019895e+00  2.908128e+00
## missing values  0.000000e+00  0.000000e+00

## Importance of components:
##                           Comp.1    Comp.2    Comp.3    Comp.4     Comp.5
## Standard deviation     1.3547281 1.2714653 1.1659407 1.0664121 1.00968509
## Proportion of Variance 0.1670791 0.1471725 0.1237572 0.1035304 0.09280889
## Cumulative Proportion  0.1670791 0.3142516 0.4380088 0.5415392 0.63434810
##                            Comp.6     Comp.7     Comp.8     Comp.9
## Standard deviation     0.92236077 0.87441102 0.84447153 0.78945510
## Proportion of Variance 0.07744963 0.06960636 0.06492138 0.05673781
## Cumulative Proportion  0.71179773 0.78140410 0.84632547 0.90306329
##                           Comp.10    Comp.11
## Standard deviation     0.74291139 0.71616262
## Proportion of Variance 0.05024487 0.04669184
## Cumulative Proportion  0.95330816 1.00000000

Note: need 3 PC’s to reach 50% variance…not a strong clustering, but acceptable Note 2: Hapto.Log was NOT used for the clustering

these graphs are not very useful, but they show the variance explained by the clustering per PC

save first 2 PC’s as data frame

##         2 groups    3 groups   4 groups   5 groups    6 groups    7 groups
## SSE 1553.5926985 1084.317669 856.218233 725.430869 606.8355549 498.7290525
## ssi    0.7589306    1.914652   1.206969   1.672581   0.5223833   0.6090285
##        8 groups    9 groups   10 groups   11 groups   12 groups
## SSE 420.8754657 377.4103279 339.3894363 305.8605196 277.9890049
## ssi   0.5992915   0.6208762   0.6175573   0.6631954   0.6666952
##       13 groups   14 groups   15 groups  16 groups   17 groups   18 groups
## SSE 254.3820623 233.5398266 217.3774045 203.260330 191.2415393 180.7887876
## ssi   0.7676859   0.7652891   0.9378094   0.715831   0.7740985   0.9852646
##       19 groups 20 groups
## SSE 171.1566582 161.99590
## ssi   0.9947309   1.00861

use 3 clusters based on maximum in ssi graph, could use 5 as well like before the Silhouette plot is not working; and number of observations per cluster

## 
##   1   2   3 
## 143 283 286

distribution of observations over 4 clusters is quite even

## 
##   1   2   3 
## 143 283 286
## Warning: Removed 1 rows containing missing values (geom_point).

Note: Hapto.Log, CE.Lame, BS.NEFA.0.7, MS.S.Cell.Count and BS.Month_warm are in the same direction and define one cluster on the right. The CE.Stom.Tension01, CE.Stom.Fullness01 and other variable axis are in the oposite direction defining the other two clusters

…. cbind cluster numbers into data set hp10, i.e. the non centered, non-scaled data set

cbind cluster numbers into data set hp9, the original data set, and save as RData set, as well

Plots and descriptives of variables per cluster

## 'data.frame':    712 obs. of  6 variables:
##  $ MS.Milk.Yield      : num  21.2 23.3 14.9 20.7 11.9 17 12.5 11.4 36.8 12.8 ...
##  $ MS.Protein         : num  2.79 3.05 2.85 3.42 2.89 2.76 3.3 3.7 2.86 3.37 ...
##  $ MS.Lactose         : num  4.73 4.86 4.97 4.82 4.81 4.8 4.83 4.81 4.82 4.85 ...
##  $ MS.S.Cell.Count.Log: num  2.77 5.7 5.44 3.04 2.3 ...
##  $ Hapto.Log          : num  6.39 5.63 6.61 5.63 7.55 ...
##  $ clusterHapto       : int  2 2 2 3 2 1 2 3 1 2 ...

cbind the Hapto.Log and Hapto.0.35 varaibles into the data set

## Observations: 712
## Variables: 13
## $ MS.Milk.Yield       <dbl> 21.2, 23.3, 14.9, 20.7, 11.9, 17.0, 12.5, 11…
## $ MS.Protein          <dbl> 2.79, 3.05, 2.85, 3.42, 2.89, 2.76, 3.30, 3.…
## $ MS.Lactose          <dbl> 4.73, 4.86, 4.97, 4.82, 4.81, 4.80, 4.83, 4.…
## $ MS.S.Cell.Count.Log <dbl> 2.772589, 5.697093, 5.442418, 3.044522, 2.30…
## $ Hfr.or.Cow          <dbl> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,…
## $ BS.Month_warm       <dbl> 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0,…
## $ CE.Stom.Tension01   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ CE.Stom.Fullness01  <dbl> 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1,…
## $ CE.Lame             <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
## $ BS.NEFA.0.7         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,…
## $ Hapto.Log           <dbl> 6.392754, 5.634790, 6.608675, 5.634790, 7.55…
## $ clusterHapto        <int> 2, 2, 2, 3, 2, 1, 2, 3, 1, 2, 2, 2, 2, 3, 3,…
## $ V2                  <fct> 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0,…
## 'data.frame':    712 obs. of  6 variables:
##  $ MS.Milk.Yield      : num  21.2 23.3 14.9 20.7 11.9 17 12.5 11.4 36.8 12.8 ...
##  $ MS.Protein         : num  2.79 3.05 2.85 3.42 2.89 2.76 3.3 3.7 2.86 3.37 ...
##  $ MS.Lactose         : num  4.73 4.86 4.97 4.82 4.81 4.8 4.83 4.81 4.82 4.85 ...
##  $ MS.S.Cell.Count.Log: num  2.77 5.7 5.44 3.04 2.3 ...
##  $ Hapto.Log          : num  6.39 5.63 6.61 5.63 7.55 ...
##  $ clusterHapto       : int  2 2 2 3 2 1 2 3 1 2 ...

Note: could add any other numeric variable of interest to the graph

descriptives by cluster for MS.Milk.Yield

## $`1`
##                      [,1]
## N              143.000000
## mean            13.697203
## Std.Dev.         5.425695
## min              0.400000
## Q1              10.500000
## median          12.900000
## Q3              16.650000
## max             37.600000
## missing values   0.000000
## 
## $`2`
##                      [,1]
## N              283.000000
## mean            14.879859
## Std.Dev.         5.416116
## min              0.600000
## Q1              11.100000
## median          13.600000
## Q3              17.150000
## max             40.600000
## missing values   0.000000
## 
## $`3`
##                      [,1]
## N              286.000000
## mean            15.563287
## Std.Dev.         6.795877
## min              1.400000
## Q1              11.125000
## median          14.500000
## Q3              17.900000
## max             46.300000
## missing values   0.000000

descriptives by cluster for MS.Protein

## $`1`
##                       [,1]
## N              143.0000000
## mean             3.1765035
## Std.Dev.         0.2998597
## min              2.4200000
## Q1               2.9750000
## median           3.1600000
## Q3               3.3900000
## max              3.9600000
## missing values   0.0000000
## 
## $`2`
##                       [,1]
## N              283.0000000
## mean             3.1444523
## Std.Dev.         0.2200241
## min              2.6200000
## Q1               2.9700000
## median           3.1400000
## Q3               3.2800000
## max              3.8900000
## missing values   0.0000000
## 
## $`3`
##                       [,1]
## N              286.0000000
## mean             3.4748252
## Std.Dev.         0.3097874
## min              2.7400000
## Q1               3.2800000
## median           3.4700000
## Q3               3.6400000
## max              5.9800000
## missing values   0.0000000

descriptives by cluster for MS.Lactose

## $`1`
##                       [,1]
## N              143.0000000
## mean             4.7400699
## Std.Dev.         0.1795201
## min              4.2400000
## Q1               4.6600000
## median           4.7700000
## Q3               4.8500000
## max              5.1000000
## missing values   0.0000000
## 
## $`2`
##                       [,1]
## N              283.0000000
## mean             4.9433569
## Std.Dev.         0.1273086
## min              4.5200000
## Q1               4.8550000
## median           4.9300000
## Q3               5.0400000
## max              5.2700000
## missing values   0.0000000
## 
## $`3`
##                       [,1]
## N              286.0000000
## mean             4.7569930
## Std.Dev.         0.1954088
## min              2.8400000
## Q1               4.6700000
## median           4.7900000
## Q3               4.8600000
## max              5.1300000
## missing values   0.0000000

descriptives by cluster for MS.S.Cell.Count

## $`1`
##                      [,1]
## N              143.000000
## mean             5.111426
## Std.Dev.         1.459594
## min              2.302585
## Q1               3.890987
## median           5.247024
## Q3               6.055595
## max              8.741935
## missing values   0.000000
## 
## $`2`
##                       [,1]
## N              283.0000000
## mean             3.5697581
## Std.Dev.         0.9276868
## min              2.3025851
## Q1               2.8332133
## median           3.4657359
## Q3               4.1023359
## max              6.9641356
## missing values   0.0000000
## 
## $`3`
##                      [,1]
## N              286.000000
## mean             4.320388
## Std.Dev.         1.223714
## min              2.302585
## Q1               3.367296
## median           4.085941
## Q3               5.153292
## max              8.674368
## missing values   0.000000

descriptives by cluster for Hapto.Log

## $`1`
##                      [,1]
## N              143.000000
## mean            10.883418
## Std.Dev.         2.612452
## min              5.634790
## Q1               8.891110
## median          11.596163
## Q3              13.026715
## max             14.756045
## missing values   0.000000
## 
## $`2`
##                      [,1]
## N              283.000000
## mean             6.649202
## Std.Dev.         1.665600
## min              4.976734
## Q1               5.634790
## median           5.823046
## Q3               7.313049
## max             13.774741
## missing values   0.000000
## 
## $`3`
##                      [,1]
## N              286.000000
## mean             6.324584
## Std.Dev.         1.464240
## min              4.941642
## Q1               5.634790
## median           5.634790
## Q3               6.503165
## max             12.508656
## missing values   0.000000

Note: if we defined a new cut-off for Hapto.Log above the minimum of cluster 1, exp(5.63479) = 280 This is a very low value compared to the other cut-offs from literature…but…

We could separate cluster 1 from the other two clusters….cluster 1 is the one defined by the disease variables… Would this be the starting point for a useful prediction model?

#……….. # Repeat graphs for factor variables

## 'data.frame':    712 obs. of  7 variables:
##  $ Hfr.or.Cow        : num  1 0 0 1 1 1 1 1 1 1 ...
##  $ BS.Month_warm     : num  0 1 1 0 0 0 1 0 1 1 ...
##  $ CE.Stom.Tension01 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CE.Stom.Fullness01: num  0 1 1 1 1 0 0 0 0 1 ...
##  $ CE.Lame           : num  1 0 0 0 0 1 0 0 1 0 ...
##  $ BS.NEFA.0.7       : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ clusterHapto      : int  2 2 2 3 2 1 2 3 1 2 ...

## , ,  = Hfr.or.Cow
## 
##     
##          1     2     3
##   0   4.80  6.41  1.60
##   01  0.00  0.00  0.00
##   02  0.00  0.00  0.00
##   1   9.49  7.87 12.69
## 
## , ,  = BS.Month_warm
## 
##     
##          1     2     3
##   0   4.90  6.11 11.54
##   01  0.00  0.00  0.00
##   02  0.00  0.00  0.00
##   1   9.39  8.18  2.75
## 
## , ,  = CE.Stom.Tension01
## 
##     
##          1     2     3
##   0  12.79 13.73 13.09
##   01  0.00  0.00  0.00
##   02  0.00  0.00  0.00
##   1   1.50  0.56  1.20
## 
## , ,  = CE.Stom.Fullness01
## 
##     
##          1     2     3
##   0  10.89  3.74  6.24
##   01  0.00  0.00  0.00
##   02  0.00  0.00  0.00
##   1   3.40 10.55  8.04
## 
## , ,  = CE.Lame
## 
##     
##          1     2     3
##   0   9.09 13.28 13.74
##   01  0.00  0.00  0.00
##   02  0.00  0.00  0.00
##   1   5.19  1.01  0.55
## 
## , ,  = BS.NEFA.0.7
## 
##     
##          1     2     3
##   0   6.49 11.86 13.59
##   01  0.00  0.00  0.00
##   02  0.00  0.00  0.00
##   1   7.79  2.42  0.70
## 
## , ,  = hp9$Cow.Breed
## 
##     
##          1     2     3
##   0   0.00  0.00  0.00
##   01 10.19 10.15  6.69
##   02  4.10  4.14  7.59
##   1   0.00  0.00  0.00
## 'data.frame':    84 obs. of  4 variables:
##  $ levels      : Factor w/ 4 levels "0","01","02",..: 1 2 3 4 1 2 3 4 1 2 ...
##  $ clusterHapto: Factor w/ 3 levels "1","2","3": 1 1 1 1 2 2 2 2 3 3 ...
##  $ variable    : Factor w/ 7 levels "Hfr.or.Cow","BS.Month_warm",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ proportion  : num  4.8 0 0 9.49 6.41 0 0 7.87 1.6 0 ...

## , ,  = Hfr.or.Cow
## 
##     
##        1   2   3
##   0   48 127  32
##   01   0   0   0
##   02   0   0   0
##   1   95 156 254
## 
## , ,  = BS.Month_warm
## 
##     
##        1   2   3
##   0   49 121 231
##   01   0   0   0
##   02   0   0   0
##   1   94 162  55
## 
## , ,  = CE.Stom.Tension01
## 
##     
##        1   2   3
##   0  128 272 262
##   01   0   0   0
##   02   0   0   0
##   1   15  11  24
## 
## , ,  = CE.Stom.Fullness01
## 
##     
##        1   2   3
##   0  109  74 125
##   01   0   0   0
##   02   0   0   0
##   1   34 209 161
## 
## , ,  = CE.Lame
## 
##     
##        1   2   3
##   0   91 263 275
##   01   0   0   0
##   02   0   0   0
##   1   52  20  11
## 
## , ,  = BS.NEFA.0.7
## 
##     
##        1   2   3
##   0   65 235 272
##   01   0   0   0
##   02   0   0   0
##   1   78  48  14
## 
## , ,  = hp9$Cow.Breed
## 
##     
##        1   2   3
##   0    0   0   0
##   01 102 201 134
##   02  41  82 152
##   1    0   0   0

Re-do the logistic regression for Hapto0.35